# Get data
truckee <- read_csv("clean_truckee_flow.csv")
# Convert to ts data
truckee_ts <- ts(truckee$mean_va, frequency = 12, start = c(2000,1))
# plot(truckee_ts)
# Decompose to explore data further
truckee_dc <- decompose(truckee_ts)
plot(truckee_dc)
Although there are some high values and low values in the data, there do not seem to be any outliers (which could have a disproportionate effect on the time series model). There seems to be a slight downward trend in the data and it looks non-stationary and additive. We see a seasonal pattern that repeats about every 12 months and a slight cyclical trend about every five years (the seasonal component is on the same scale as the orinigal data).
# Holt Winters exponential smoothing
truckee_hw <- HoltWinters(truckee_ts)
# plot(truckee_hw)
# Forecast Holt Winters
truckee_forecast <- forecast(truckee_hw, h = 60)
# plot(truckee_forecast)
# Autoregressive integrated moving average (ARIMA) for comparison
# estimate pdq
truckee_pdq <- auto.arima(truckee_ts) # [2,1,1,][0,0,2]
# fit the ARIMA model
truckee_arima <- arima(truckee_ts, order = c(2,1,1), seasonal = list(order = c(0,0,2)))
# evaluate residuals
# par(mfrow = c(1,2))
# hist(truckee_arima$residuals)
# qqnorm(truckee_arima$residuals) # looks normal
# forecast ARIMA
forecast_truckee <- forecast(truckee_arima, h = 60)
# plot(forecast_truckee)
# Graph of Holt Winters
plot(truckee_forecast,
xlab = "Time",
ylab = "Truckee River Flows (cubic feet per second)")
par(mfrow = c(1,2))
hist(truckee_forecast$residuals)
qqnorm(truckee_forecast$residuals) # Looks relatively normally distributed.
# Read in nps data
nps_ca <- read_sf(dsn = ".", layer = "nps_boundary") %>%
filter(STATE == "CA",
UNIT_TYPE == "National Park") %>%
dplyr::select(UNIT_NAME) %>%
rename(Name = UNIT_NAME)
st_crs(nps_ca) = 4326
# Read in CA county data
ca_counties <- read_sf(dsn = ".", layer = "california_county_shape_file")
st_crs(ca_counties) = 4326
# Map it!
map_nps_ca <- tm_shape(nps_ca) +
tm_fill("Name", palette = "Dark2", alpha = 0.5) +
tm_shape(ca_counties) +
tm_basemap("Esri.WorldPhysical") +
tm_legend(show = FALSE) +
tm_borders()
tmap_mode("view")
map_nps_ca
# Read in data and initial wrangling
lizard <- read_csv("clean_lizard.csv") %>%
filter(site == "CALI") %>%
replace_with_na_all(condition = ~.x == ".") %>%
filter(sex == "F" | sex == "M") %>%
drop_na(weight)
# Coerce weights to be numeric
lizard$weight <- as.numeric(lizard$weight)
# Visualize data
# ggplot(lizard, aes(x = weight, fill = sex)) +
# geom_histogram(alpha = 0.5, position = "identity")
# Both male and female weights are skewed. However, the central limit theorem applies here - distribution of means will be normally distributed (we have more than 30 observations in each group)
# Vector of adult male lizard weights
male <- lizard %>%
filter(sex == "M") %>%
pull(weight)
# Vector of adult female lizard weights
female <- lizard %>%
filter(sex == "F") %>%
pull(weight)
# F test for equal variance
# H0: Ratio of variances (Variance A/Variance B) = 1
# HA: Ratio of variances is NOT = 1
lizard_f <- var.test(female, male)
# p-value = 0.29, retain the null hypothesis of equal variance
# T sample t-test
# H0: Mean weights of adult female and male lizards are equal
# HA: Mean weights of adult female and male lizards are NOT equal
lizard_t <- t.test(female, male, var.equal = TRUE)
# p-value = 0.43, retain the null hypothesis. For lizards trapped at the CALI site, mean weights of female and male adult lizards do not differ significantly.
# Mean weights and Cohen's d - effect size
male_lizard <- lizard %>%
dplyr::select(sex, weight) %>%
filter(sex == "M")
female_lizard <- lizard %>%
dplyr::select(sex, weight) %>%
filter(sex == "F")
male_mean <- mean(male_lizard$weight) # 4.96
male_sd <- sd(male_lizard$weight) # 5.68
female_mean <- mean(female_lizard$weight) # 6.50
female_sd <- sd(female_lizard$weight) #5.83
# Mean weight difference
mean_diff <- female_mean - male_mean # 0.86
effect_size <- cohen.d(weight ~ sex, data = lizard)
# 0.14 neglibile
Mean adult female lizard weights (6.50 g ± 5.83 g, n = 75) and adult male lizard weights (4.96 g ± 5.68 g, n = 57) for lizards trapped at the Caliche creosotebush site did not differ significantly [t(130) = 0.8, p = 0.427, \(\alpha\) = 0.05]. The effect size is negligible (Cohen’s d = 0.14) and the difference in mean weight between female and male lizards is 0.86 grams.
lizard_tails <- lizard %>%
dplyr::select(sex, tail) %>%
drop_na(tail)
# Chi-square
# H0: There is no significant difference between proportion of females and males with broken tails
# HA: There is a significant difference between proportion of females and males with broken tails
chi_tail <- lizard_tails %>%
count(sex, tail) %>%
spread(tail, n) %>%
dplyr::select(-sex)
rownames(chi_tail) <- c("F","M")
#Actual proportions:
tail_prop <- prop.table(as.matrix(chi_tail), 1)
tail_x2 <- chisq.test(chi_tail) # Retain the null, there is no significant difference in proportions of females and males with broken tails at the CALI site x2 = 0.163, df = 1, p-value = 0.69
There is no significant difference in the proportion of observed adult female (0.23) and male lizards (0.18) with broken tails at the Caliche creosotebush site (\(\chi^2\){1} = 0.16, p = 0.69).